Fragestellung: “Die Manschafft, die zur Halbzeit vorne liegt, gewinnt mit einer Chance von mindestens 75% auch das Spiel. Falls zur Halbzeit unentschieden ist, gewinnt eher das Heimteam.”

Dafür nehmen wir den Datacamp Datensatz Soccer Data

Als Einführung werden wir auf Datacamp folgende Kurse durchgehen:

# Bibliotheken importieren
library("plotly")
library("plyr")
library("dplyr")
library("forcats")

Daten einlesen und Dataframe erstellen

# List files in folder "Data"
files <- list.files(path="./Data/", pattern=NULL, all.files=FALSE, full.names=TRUE)

# Create Dataframe with all csv from years 2015-2019
df <- ldply(.data = files, .fun = read.csv)
# Count frequency of haltime & fulltime results
df_htr <- df %>% count(HTR)
df_ftr <- df %>% count(FTR)
# Create dataframe with halttime & fulltime result amounts
df_results <- data.frame(c("Away", "Draw", "Home"), c(df_htr$n), c(df_ftr$n))

# Rename column headers
col_headings <- c('Result','Halftime','Fulltime')
names(df_results) <- col_headings

# Plot grouped bar chart to visualize halftime & fulltime results
fig <- plot_ly(
  df_results, x = ~Results, y = ~Halftime, type = 'bar', name = 'Half Time Score') %>% 
  add_trace(y = ~Fulltime, name = 'Full Time Score') %>%
  layout(yaxis = list(title = 'Count'), barmode = 'group')

fig
NA
# merge HTR & FTR to 1 column
df$result <- paste(df$HTR, df$FTR)

print("Example: H H = A home team is winning at halftime and also wins the game at fulltime")
[1] "Example: H H = A home team is winning at halftime and also wins the game at fulltime"
# Plot all different game progresses and their amount
df %>%
  count(result) %>%
  mutate(result = fct_reorder(result, n, .desc = TRUE)) %>%
  plot_ly(x = ~result, y = ~n, text = ~n, textposition = 'auto') %>%
  add_bars() %>%
  layout(xaxis = list(title = "Game Progress"),
         yaxis = list(title = "Amount"),
         title = "How are the different game progresses distributed?")
# Group by game outcome & calculate probability of all outcomes
df_count_results <- df %>% 
  group_by(result) %>% 
  summarise(count_result = round(n() / nrow(df) * 100, digits = 2))

df_count_results %>%
  mutate(result = fct_reorder(result, count_result, .desc = TRUE)) %>%
  plot_ly(x = ~result, y = ~count_result, text = ~count_result, textposition = 'auto') %>%
  add_bars() %>%
  layout(xaxis = list(title = "Game Progress"),
         yaxis = list(title = "Probability %"),
         title = "What is the probability of each game progress?")
# Calculate probability 
calc_prob <- function(df1, df2) {
  prob <- round((100 / nrow(df1) * nrow(df2)), digits = 2)
  return(prob)
}
# Filter home teams winning at halftime
df_ht_home <- df %>% 
  filter(HTR == "H")

# Filter home teams winning at halftime & fulltime
df_ft_home <- df_ht_home %>% 
  filter(FTR == "H")

# Chance Heimteam führt zur Halbzeit und gewinnt das Spiel
home_win_prob <- calc_prob(df_ht_home, df_ft_home)
cat("Probability that the home team wins the game if they are leading at half time: ", home_win_prob, "%")
Probability that the home team wins the game if they are leading at half time:  82.55 %
#cat(nrow(df_ft_home))
# Filter away teams winning at halftime
df_ht_away <- df %>% 
  filter(HTR == "A")

# Filter away teams winning at halftime & fulltime
df_ft_away <- df_ht_away %>% 
  filter(FTR == "A")

# Chance Auswärtsteam führt zur Halbzeit und gewinnt das Spiel
away_win_prob <- calc_prob(df_hf_away, df_ft_away)
cat("Probability that the away team wins the game if they are leading at half time: ", away_win_prob, "%")
Probability that the away team wins the game if they are leading at half time:  72.03 %
#print(nrow(df_ft_away))
# Filter draw at halftime
df_ht_draw <- df %>% 
  filter(HTR == "D")

# Filter draw at halftime & fulltime
df_ft_draw <- df_ht_draw %>% 
  filter(FTR == "D")

# Chance Unentschieden zur Halbzeit und auch am Ende des Spiels
draw_prob <- calc_prob(df_hf_draw, df_ft_draw)
cat("Probability that the game ends in a draw if the halftime result is also a draw: ", away_win_prob, "%")
Probability that the game ends in a draw if the halftime result is also a draw:  72.03 %
#print(nrow(df_ft_draw))
# Filter draw at halftime & the home team winning at fulltime
df_ht_draw_ft_home_win <- df_ht_draw %>%
  filter(FTR == "H")

home_win_after_ht_draw_prob <- calc_prob(df_hf_draw, df_ht_draw_ft_home_win)
cat("Probability that the home team wins if the halftime result is a draw: ", home_win_after_ht_draw_prob, "%")
Probability that the home team wins if the halftime result is a draw:  38.03 %
LS0tDQp0aXRsZTogIkRhdGF2aXogbWl0IFBsb3RseSBQTCBEYXRhIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMgRnJhZ2VzdGVsbHVuZzogIkRpZSBNYW5zY2hhZmZ0LCBkaWUgenVyIEhhbGJ6ZWl0IHZvcm5lIGxpZWd0LCBnZXdpbm50IG1pdCBlaW5lciBDaGFuY2Ugdm9uIG1pbmRlc3RlbnMgNzUlIGF1Y2ggZGFzIFNwaWVsLiBGYWxscyB6dXIgSGFsYnplaXQgdW5lbnRzY2hpZWRlbiBpc3QsIGdld2lubnQgZWhlciBkYXMgSGVpbXRlYW0uIg0KDQoNCkRhZsO8ciBuZWhtZW4gd2lyIGRlbiBEYXRhY2FtcCBEYXRlbnNhdHogW1NvY2NlciBEYXRhXShodHRwczovL2FwcC5kYXRhY2FtcC5jb20vd29ya3NwYWNlL2RhdGFzZXRzL2RhdGFzZXQtcHl0aG9uLXNvY2NlcikNCg0KQWxzIEVpbmbDvGhydW5nIHdlcmRlbiB3aXIgYXVmIERhdGFjYW1wIGZvbGdlbmRlIEt1cnNlIGR1cmNoZ2VoZW46DQoNCi0gW0ludGVyYWN0aXZlIERhdGEgVmlzdWFsaXphdGlvbiB3aXRoIHBsb3RseV0oaHR0cHM6Ly9hcHAuZGF0YWNhbXAuY29tL2xlYXJuL2NvdXJzZXMvaW50ZXJhY3RpdmUtZGF0YS12aXN1YWxpemF0aW9uLXdpdGgtcGxvdGx5LWluLXIpDQoNCi0gW0ludGVybWVkaWF0ZSBJbnRlcmFjdGl2ZSBEYXRhIFZpc3VhbGl6YXRpb24gd2l0aCBwbG90bHldKGh0dHBzOi8vYXBwLmRhdGFjYW1wLmNvbS9sZWFybi9jb3Vyc2VzL2ludGVyYWN0aXZlLWRhdGEtdmlzdWFsaXphdGlvbi13aXRoLXBsb3RseS1pbi1yKQ0KDQpgYGB7cn0NCiMgQmlibGlvdGhla2VuIGltcG9ydGllcmVuDQpsaWJyYXJ5KCJwbG90bHkiKQ0KbGlicmFyeSgicGx5ciIpDQpsaWJyYXJ5KCJkcGx5ciIpDQpsaWJyYXJ5KCJmb3JjYXRzIikNCmBgYA0KIyMgRGF0ZW4gZWlubGVzZW4gdW5kIERhdGFmcmFtZSBlcnN0ZWxsZW4NCmBgYHtyfQ0KIyBMaXN0IGZpbGVzIGluIGZvbGRlciAiRGF0YSINCmZpbGVzIDwtIGxpc3QuZmlsZXMocGF0aD0iLi9EYXRhLyIsIHBhdHRlcm49TlVMTCwgYWxsLmZpbGVzPUZBTFNFLCBmdWxsLm5hbWVzPVRSVUUpDQoNCiMgQ3JlYXRlIERhdGFmcmFtZSB3aXRoIGFsbCBjc3YgZnJvbSB5ZWFycyAyMDE1LTIwMTkNCmRmIDwtIGxkcGx5KC5kYXRhID0gZmlsZXMsIC5mdW4gPSByZWFkLmNzdikNCmBgYA0KDQpgYGB7cn0NCiMgQ291bnQgZnJlcXVlbmN5IG9mIGhhbHRpbWUgJiBmdWxsdGltZSByZXN1bHRzDQpkZl9odHIgPC0gZGYgJT4lIGNvdW50KEhUUikNCmRmX2Z0ciA8LSBkZiAlPiUgY291bnQoRlRSKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDcmVhdGUgZGF0YWZyYW1lIHdpdGggaGFsdHRpbWUgJiBmdWxsdGltZSByZXN1bHQgYW1vdW50cw0KZGZfcmVzdWx0cyA8LSBkYXRhLmZyYW1lKGMoIkF3YXkiLCAiRHJhdyIsICJIb21lIiksIGMoZGZfaHRyJG4pLCBjKGRmX2Z0ciRuKSkNCg0KIyBSZW5hbWUgY29sdW1uIGhlYWRlcnMNCmNvbF9oZWFkaW5ncyA8LSBjKCdSZXN1bHQnLCdIYWxmdGltZScsJ0Z1bGx0aW1lJykNCm5hbWVzKGRmX3Jlc3VsdHMpIDwtIGNvbF9oZWFkaW5ncw0KDQojIFBsb3QgZ3JvdXBlZCBiYXIgY2hhcnQgdG8gdmlzdWFsaXplIGhhbGZ0aW1lICYgZnVsbHRpbWUgcmVzdWx0cw0KZmlnIDwtIHBsb3RfbHkoDQogIGRmX3Jlc3VsdHMsIHggPSB+UmVzdWx0cywgeSA9IH5IYWxmdGltZSwgdHlwZSA9ICdiYXInLCBuYW1lID0gJ0hhbGYgVGltZSBTY29yZScpICU+JSANCiAgYWRkX3RyYWNlKHkgPSB+RnVsbHRpbWUsIG5hbWUgPSAnRnVsbCBUaW1lIFNjb3JlJykgJT4lDQogIGxheW91dCh5YXhpcyA9IGxpc3QodGl0bGUgPSAnQ291bnQnKSwgYmFybW9kZSA9ICdncm91cCcpDQoNCmZpZw0KYGBgDQpgYGB7cn0NCiMgbWVyZ2UgSFRSICYgRlRSIHRvIDEgY29sdW1uDQpkZiRyZXN1bHQgPC0gcGFzdGUoZGYkSFRSLCBkZiRGVFIpDQoNCnByaW50KCJFeGFtcGxlOiBIIEggPSBob21lIHRlYW0gaXMgd2lubmluZyBhdCBoYWxmdGltZSBhbmQgYWxzbyB3aW5zIHRoZSBnYW1lIGF0IGZ1bGx0aW1lIikNCmBgYA0KDQpgYGB7cn0NCiMgUGxvdCBhbGwgZGlmZmVyZW50IGdhbWUgcHJvZ3Jlc3NlcyBhbmQgdGhlaXIgYW1vdW50DQpkZiAlPiUNCiAgY291bnQocmVzdWx0KSAlPiUNCiAgbXV0YXRlKHJlc3VsdCA9IGZjdF9yZW9yZGVyKHJlc3VsdCwgbiwgLmRlc2MgPSBUUlVFKSkgJT4lDQogIHBsb3RfbHkoeCA9IH5yZXN1bHQsIHkgPSB+biwgdGV4dCA9IH5uLCB0ZXh0cG9zaXRpb24gPSAnYXV0bycpICU+JQ0KICBhZGRfYmFycygpICU+JQ0KICBsYXlvdXQoeGF4aXMgPSBsaXN0KHRpdGxlID0gIkdhbWUgUHJvZ3Jlc3MiKSwNCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJBbW91bnQiKSwNCiAgICAgICAgIHRpdGxlID0gIkhvdyBhcmUgdGhlIGRpZmZlcmVudCBnYW1lIHByb2dyZXNzZXMgZGlzdHJpYnV0ZWQ/IikNCmBgYA0KYGBge3J9DQojIEdyb3VwIGJ5IGdhbWUgb3V0Y29tZSAmIGNhbGN1bGF0ZSBwcm9iYWJpbGl0eSBvZiBhbGwgb3V0Y29tZXMNCmRmX2NvdW50X3Jlc3VsdHMgPC0gZGYgJT4lIA0KICBncm91cF9ieShyZXN1bHQpICU+JSANCiAgc3VtbWFyaXNlKGNvdW50X3Jlc3VsdCA9IHJvdW5kKG4oKSAvIG5yb3coZGYpICogMTAwLCBkaWdpdHMgPSAyKSkNCg0KZGZfY291bnRfcmVzdWx0cyAlPiUNCiAgbXV0YXRlKHJlc3VsdCA9IGZjdF9yZW9yZGVyKHJlc3VsdCwgY291bnRfcmVzdWx0LCAuZGVzYyA9IFRSVUUpKSAlPiUNCiAgcGxvdF9seSh4ID0gfnJlc3VsdCwgeSA9IH5jb3VudF9yZXN1bHQsIHRleHQgPSB+Y291bnRfcmVzdWx0LCB0ZXh0cG9zaXRpb24gPSAnYXV0bycpICU+JQ0KICBhZGRfYmFycygpICU+JQ0KICBsYXlvdXQoeGF4aXMgPSBsaXN0KHRpdGxlID0gIkdhbWUgUHJvZ3Jlc3MiKSwNCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJQcm9iYWJpbGl0eSAlIiksDQogICAgICAgICB0aXRsZSA9ICJXaGF0IGlzIHRoZSBwcm9iYWJpbGl0eSBvZiBlYWNoIGdhbWUgcHJvZ3Jlc3M/IikNCmBgYA0KDQoNCg0KYGBge3J9DQojIENhbGN1bGF0ZSBwcm9iYWJpbGl0eSANCmNhbGNfcHJvYiA8LSBmdW5jdGlvbihkZjEsIGRmMikgew0KICBwcm9iIDwtIHJvdW5kKCgxMDAgLyBucm93KGRmMSkgKiBucm93KGRmMikpLCBkaWdpdHMgPSAyKQ0KICByZXR1cm4ocHJvYikNCn0NCmBgYA0KDQpgYGB7cn0NCiMgRmlsdGVyIGhvbWUgdGVhbXMgd2lubmluZyBhdCBoYWxmdGltZQ0KZGZfaHRfaG9tZSA8LSBkZiAlPiUgDQogIGZpbHRlcihIVFIgPT0gIkgiKQ0KDQojIEZpbHRlciBob21lIHRlYW1zIHdpbm5pbmcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQ0KZGZfZnRfaG9tZSA8LSBkZl9odF9ob21lICU+JSANCiAgZmlsdGVyKEZUUiA9PSAiSCIpDQoNCmhvbWVfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2h0X2hvbWUsIGRmX2Z0X2hvbWUpDQpjYXQoIlByb2JhYmlsaXR5IHRoYXQgdGhlIGhvbWUgdGVhbSB3aW5zIHRoZSBnYW1lIGlmIHRoZXkgYXJlIGxlYWRpbmcgYXQgaGFsZiB0aW1lOiAiLCBob21lX3dpbl9wcm9iLCAiJSIpDQoNCiNjYXQobnJvdyhkZl9mdF9ob21lKSkNCmBgYA0KDQpgYGB7cn0NCiMgRmlsdGVyIGF3YXkgdGVhbXMgd2lubmluZyBhdCBoYWxmdGltZQ0KZGZfaHRfYXdheSA8LSBkZiAlPiUgDQogIGZpbHRlcihIVFIgPT0gIkEiKQ0KDQojIEZpbHRlciBhd2F5IHRlYW1zIHdpbm5pbmcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQ0KZGZfZnRfYXdheSA8LSBkZl9odF9hd2F5ICU+JSANCiAgZmlsdGVyKEZUUiA9PSAiQSIpDQoNCmF3YXlfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2hmX2F3YXksIGRmX2Z0X2F3YXkpDQpjYXQoIlByb2JhYmlsaXR5IHRoYXQgdGhlIGF3YXkgdGVhbSB3aW5zIHRoZSBnYW1lIGlmIHRoZXkgYXJlIGxlYWRpbmcgYXQgaGFsZiB0aW1lOiAiLCBhd2F5X3dpbl9wcm9iLCAiJSIpDQpgYGANCg0KYGBge3J9DQojIEZpbHRlciBkcmF3IGF0IGhhbGZ0aW1lDQpkZl9odF9kcmF3IDwtIGRmICU+JSANCiAgZmlsdGVyKEhUUiA9PSAiRCIpDQoNCiMgRmlsdGVyIGRyYXcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQ0KZGZfZnRfZHJhdyA8LSBkZl9odF9kcmF3ICU+JSANCiAgZmlsdGVyKEZUUiA9PSAiRCIpDQoNCmRyYXdfcHJvYiA8LSBjYWxjX3Byb2IoZGZfaGZfZHJhdywgZGZfZnRfZHJhdykNCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgZ2FtZSBlbmRzIGluIGEgZHJhdyBpZiB0aGUgaGFsZnRpbWUgcmVzdWx0IGlzIGFsc28gYSBkcmF3OiAiLCBhd2F5X3dpbl9wcm9iLCAiJSIpDQpgYGANCg0KYGBge3J9DQojIEZpbHRlciBkcmF3IGF0IGhhbGZ0aW1lICYgdGhlIGhvbWUgdGVhbSB3aW5uaW5nIGF0IGZ1bGx0aW1lDQpkZl9odF9kcmF3X2Z0X2hvbWVfd2luIDwtIGRmX2h0X2RyYXcgJT4lDQogIGZpbHRlcihGVFIgPT0gIkgiKQ0KDQpob21lX3dpbl9hZnRlcl9odF9kcmF3X3Byb2IgPC0gY2FsY19wcm9iKGRmX2hmX2RyYXcsIGRmX2h0X2RyYXdfZnRfaG9tZV93aW4pDQpjYXQoIlByb2JhYmlsaXR5IHRoYXQgdGhlIGhvbWUgdGVhbSB3aW5zIGlmIHRoZSBoYWxmdGltZSByZXN1bHQgaXMgYSBkcmF3OiAiLCBob21lX3dpbl9hZnRlcl9odF9kcmF3X3Byb2IsICIlIikNCmBgYA0KDQpgYGB7cn0NCg0KYGBgDQoNCmBgYHtyfQ0KDQpgYGANCg0K